import re
import time
import os
import requests

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'
}



iurl_base = 'https://buondua.com/tag/'
iurl_name = '%E7%96%AF%E7%8C%ABss-11613'
ipage = ''
response = requests.get(iurl_base + iurl_name,headers=headers)
ihtml = response.text
#print(response.text)
#<a class="item-link popunder" href="/coser-疯猫ss-七了个三-女仆特工-54-photos-39686">
#https://buondua.com/coser-%E5%B0%8F%E9%9D%92%E8%8C%97-%E9%BB%91%E4%B8%9D%E5%A5%B3%E4%BB%86-42-photos-38682
idir_name = re.findall(r'<a class="item-link popunder" href="([^"]+)"', ihtml)
print(idir_name)

# 使用 set 去重
unique_urls = set(idir_name)

# 提取 https://www.4khd.com/content/ 后面的部分
# 可以使用列表推导式和字符串操作获取
trimmed_urls = [url.replace('https://www.4khd.com/content/', '') for url in unique_urls]

# 打印最终结果
print(trimmed_urls)


